store_data <- read.csv("Superstore _data.csv")
library(ggplot2)
library(tidyverse)
library(plotly)
library(lubridate)
store_data |>
mutate(unitPrice = ifelse(Discount > 0,(Sales/Quantity)/(1-Discount),Sales/Quantity),
subtotal = unitPrice*Quantity,
costPrice = Sales-Profit,
markup = 100*Profit/costPrice) |>
group_by(Category,Sub.Category) |>
summarise(revenue = sum(subtotal),
gross_profit = as.integer(sum(revenue)-sum(costPrice)))->gp_category
## `summarise()` has grouped output by 'Category'. You can override using the `.groups` argument.
gp_category |>
mutate(grossMargin=(gross_profit/revenue)*100) ->gp_category
ggplotly(ggplot(gp_category,aes(Category,revenue,fill = Sub.Category))+
geom_bar(stat = "identity")+
labs(x= "Categories", y = "Revenue",
title = "Revenue Generated by Each Categories and Sub-Categories")+
theme(axis.text.x = element_text(angle = 45)))
options(scipen = 999)
gp_category |>
pivot_longer(revenue:gross_profit,
names_to = "legend",
values_to = "Amount")->pivot_gp_category
options(scipen = 999)
ggplotly(ggplot(pivot_gp_category,aes(Sub.Category,Amount,fill = legend))
+geom_bar(stat = "identity")+theme(
axis.text.x = element_text(angle = 90))+
labs(x= "Sub-Categories", y = "Amount(Dollars)",
title = "Revenue and Gross Profit Generated of each Sub-Categories"))
options(scipen = 999)
ggplotly(ggplot(pivot_gp_category,aes(Category,Amount,fill = legend))
+geom_bar(stat = "identity", position = "dodge")+theme(
axis.text.x = element_text(angle = 90))+
labs(x="Categories",y="Amount(Dollars)",
title = "Revenue and Gross Profit Generated of each Categories"))
store_data |>
group_by(Category,Sub.Category) |>
count() |>
ggplot(aes(x = Category, y = n , fill = Sub.Category))+geom_col()+
theme (axis.text.x = element_text(angle = 45))+
labs(x="Categories",y="No.of Transactions",
title = "No.of Transactions of Sub-Categories in Each Category")-> sale_volume
ggplotly(sale_volume)
store_data |>
mutate(unitPrice = ifelse(Discount > "0",(Sales/Quantity)/(1-Discount),Sales/Quantity),
subtotal = unitPrice*Quantity) |>
group_by(Customer.ID,Customer.Name,Segment) |>
summarise(total_revenue = sum(subtotal),
total_orders = n_distinct(Order.ID),
total_profit = sum(Profit)) -> customers_based
## `summarise()` has grouped output by 'Customer.ID', 'Customer.Name'. You can override using the `.groups` argument.
ggplotly(ggplot(customers_based |> filter(total_profit > "0")|>
group_by(Segment) |>
summarise(profits = sum(total_profit)),aes(Segment,profits,fill = Segment))+
geom_col()+labs(x="Names of segments of Customers",y="Profits",
title = "Profits In Each segments of Customers"))
ggplotly(ggplot(customers_based |>
group_by(Segment) |>
summarise(total_customers = n(),
segment_revenue = sum(total_revenue)), aes(total_customers,segment_revenue, fill = Segment))
+geom_col(width = 200,alpha = 0.65, position = "stack")+
labs(x="No.of Customers",y="Revenue",title = "Graph of Segment Size"))
## Warning: position_stack requires non-overlapping x intervals
store_data |>
mutate(unitPrice = ifelse(Discount > 0,(Sales/Quantity)/(1-Discount),Sales/Quantity),
subtotal = unitPrice*Quantity,
order_date = strptime(Order.Date,format = "%m/%d/%Y"),
ship_date = strptime(Ship.Date,format = "%m/%d/%Y"),
order_year = lubridate::year(order_date),
order_month = lubridate::month(order_date,label = TRUE, abbr = FALSE ),
weekday = weekdays(order_date)) |>
group_by(order_year,weekday) |>
summarise(n_transactions = n()) -> weekdays_trends
## `summarise()` has grouped output by 'order_year'. You can override using the `.groups` argument.
ggplotly(ggplot(weekdays_trends,aes(n_transactions,weekday, fill = weekday ))+geom_col()+
facet_wrap(~order_year)+labs(x="No.of Transactions",y = "Week Days",
title = "Graph of Sales in weekdays in Different Years"))
store_data |>
mutate(unitPrice = ifelse(Discount > 0,(Sales/Quantity)/(1-Discount),Sales/Quantity),
subtotal = unitPrice*Quantity,
order_date = strptime(Order.Date,format = "%m/%d/%Y"),
ship_date = strptime(Ship.Date,format = "%m/%d/%Y"),
order_year = lubridate::year(order_date),
order_month = lubridate::month(order_date,label = TRUE, abbr = FALSE ),
weekday = weekdays(order_date)) |>
group_by(order_year,weekday) |>
summarise(n_transactions = n()) -> weekdays_trends
## `summarise()` has grouped output by 'order_year'. You can override using the `.groups` argument.
ggplotly(ggplot(weekdays_trends,aes(n_transactions,weekday, fill = weekday ))+geom_col()+
facet_wrap(~order_year)+labs(x="No.of Transactions",y = "Week Days",
title = "Graph of Sales in weekdays in Different Years"))
#No. of Sales of Categories in Months of Years(2015-2018):
store_data |>
mutate(unitPrice = ifelse(Discount > 0,(Sales/Quantity)/(1-Discount),Sales/Quantity),
order_date = strptime(Order.Date,format = "%m/%d/%Y"),
order_year = lubridate::year(order_date),
order_month = month(order_date,label = TRUE),
subtotal = unitPrice*Quantity) |>
group_by(Segment,order_year,order_month) |>
summarise( n_transactions = n()) -> segment_months
## `summarise()` has grouped output by 'Segment', 'order_year'. You can override using the `.groups` argument.
ggplot(segment_months,aes((order_month),Segment,colour= Segment,size = n_transactions ))+
geom_point(alpha =.6)+theme(axis.text.x = element_text(angle = 90))+
facet_wrap(~order_year)+labs(x="",y="Segment",title = "Graph of No. of transactions By Each Segment(Monthly)")
store_data |>
mutate(unitPrice = ifelse(Discount > 0,(Sales/Quantity)/(1-Discount),Sales/Quantity),
subtotal = unitPrice*Quantity) |>
group_by(Country,Order.ID) |>
summarise(revenue = sum(subtotal)) |>
group_by(Country) |>
summarise(total_revenue = sum(revenue),
n_transactions = n_distinct(Order.ID),
avg_order_value = mean(revenue)) -> country_based
## `summarise()` has grouped output by 'Country'. You can override using the `.groups` argument.
ggplotly(
ggplot(country_based,aes(Country,total_revenue,fill = Country))+
geom_bar(stat = "identity")+theme(axis.text.x = element_text(angle = 45))+
labs(x="",y="Revenue",title = "Graph of Total Revenue against Each Country"))
ggplotly(ggplot(country_based,aes(n_transactions,avg_order_value,color = Country))+
geom_point(alpha = 0.6, size = 3)+
labs(x="No.of transactions",y="Average Order Value",
title = "Graph of Average Invoice Value and No. of Sales in Countries"))
date_today <- lubridate::ymd("20190130")
store_data |>
mutate(order_date = strptime(Order.Date,"%m/%d/%Y"))|>
group_by(Customer.ID,Region) |>
summarise(last_buy = max(order_date)) |>
mutate(days_lastPurchse = difftime(date_today,last_buy,units = "days"),
days_lastPurchse = as.integer((days_lastPurchse))) -> region_based
## `summarise()` has grouped output by 'Customer.ID'. You can override using the `.groups` argument.
region_based |>
mutate(months_lastPurchase = days_lastPurchse/30,
months_lastPurchase = as.integer(months_lastPurchase)) -> region_based
ggplotly(ggplot(region_based,aes(Region,months_lastPurchase,
shape = Region,fill = Region))+geom_boxplot()+
labs(x="",y=" Time Since Last Sale (months)"))
store_data |>
mutate(unitPrice = ifelse(Discount > 0,(Sales/Quantity)/(1-Discount),Sales/Quantity),
subtotal = unitPrice*Quantity,
order_date = strptime(Order.Date,"%m/%d/%Y"))|>
group_by(Customer.ID) |>
summarise(revenue = sum(subtotal),
last_buy = max(order_date),
first_buy = min(order_date)) |>
mutate(days_relation = difftime(last_buy,first_buy, units = "days"),
months = days_relation/30,
months = as.integer(months)) -> customer_relation
ggplotly(ggplot(customer_relation |>
filter(months > "0"),aes(months))+
geom_bar()+
labs( x="No. of Months", y = "No. of Customers",
title = "Customer Relation With Bussiness In Months"))
store_data |>
mutate(unitPrice = ifelse(Discount > 0,(Sales/Quantity)/(1-Discount),Sales/Quantity),
subtotal = unitPrice*Quantity,
order_date = strptime(Order.Date,format = "%m/%d/%Y"),
order_year = year(order_date))|>
group_by(order_year) |>
summarise(revenue = sum(subtotal),
n_transactions = n()) -> year_trend
ggplotly(ggplot(year_trend,aes(order_year,revenue))+
geom_line(color = "darkblue",size = 1)+geom_point(color = "red")+
labs( x = "Years", y = "Revenue",title = "No.of Transactions Trends 2015-2018"))
ggplotly(ggplot(year_trend,aes(order_year,n_transactions))+
geom_line(color = "darkblue",size = 1)+geom_point( color = "red")+
labs( x = "Years", y = "No-of-Transactions",title = "No.of Transactions Trends 2015-2018"))